config="InSPO"

env_name="sc2"
# maps=("6h_vs_8z" "5m_vs_6m" "2s3z" "3s_vs_5z")
# h5file_suffixs=("medium_replay" "expert" "mixed" "medium")

map="5m_vs_6m"
h5file_suffix="expert"

agent="sep_rnn"

use_td_lambda=False
use_gae=True

cql_coeff="2"

entropy_coef="0."
# entropy_coef_ends=("0.1")

alpha="3.0"

softmax_temp="100"
gae_lambda="0.8"
other_policy_pow=1

IS_ratio_min="1"
IS_ratio_max="1"

lr="0.001"
a_lr="0.001"
alpha_lr=0.0005

auto_alpha=False
target_KL="0.2"

auto_entropy=0

CUDA_DEVICE=3
seeds=("0" "1" "2" "3" "4")

for seed in "${seeds[@]}"; do

    current_time=$(date "+%Y.%m.%d-%H.%M.%S")
    
    output_file="output/${map}/${h5file_suffix}/InSPO/${map}_${h5file_suffix}_${current_time}_${seed}.txt"

    echo "Output to $output_file"

    export CUDA_VISIBLE_DEVICES=$CUDA_DEVICE

    nohup python src/main.py \
        --config=$config --env-config=$env_name with \
        env_args.map_name=$map \
        h5file_suffix=$h5file_suffix \
        agent=$agent \
        cql_coeff=$cql_coeff \
        alpha_start=$alpha \
        alpha_end=$alpha \
        entropy_coef_start=$entropy_coef \
        entropy_coef_end=$entropy_coef \
        actor_lr=$a_lr \
        critic_lr=$lr \
        gae_lambda=$gae_lambda \
        softmax_temp=$softmax_temp \
        IS_ratio_min=$IS_ratio_min \
        IS_ratio_max=$IS_ratio_max \
        use_orthogonal=$use_orthogonal \
        use_td_lambda=$use_td_lambda\
        use_gae=$use_gae \
        other_policy_pow=$other_policy_pow \
        auto_alpha=$auto_alpha \
        alpha_lr=$alpha_lr \
        target_KL=$target_KL \
        seed=$seed > $output_file 2>&1 &

    pid=$!

    echo "Running in the background with PID $pid"

    wait
done
